Last updated: 2025-11-15

Checks: 1 1

Knit directory: factor_analysis_new/

This reproducible R Markdown analysis was created with workflowr (version 1.7.0). The Checks tab describes the reproducibility checks that were applied when the results were created. The Past versions tab lists the development history.


The R Markdown file has unstaged changes. To know which version of the R Markdown file created these results, you’ll want to first commit it to the Git repo. If you’re still working on the analysis, you can ignore this warning. When you’re finished, you can run wflow_publish to commit the R Markdown file and build the HTML.

Great! You are using Git for version control. Tracking code development and connecting the code version to the results is critical for reproducibility.

The results in this page were generated with repository version c151392. See the Past versions tab to see a history of the changes made to the R Markdown and HTML files.

Note that you need to be careful to ensure that all relevant files for the analysis have been committed to Git prior to generating the results (you can use wflow_publish or wflow_git_commit). workflowr only checks the R Markdown file, but you know if there are other scripts or data files that it depends on. Below is the status of the Git repository when the results were generated:


Unstaged changes:
    Modified:   analysis/revision_acat_selectedtraits.Rmd

Note that any generated files, e.g. HTML, png, CSS, etc., are not included in this status report because it is ok for generated content to have uncommitted changes.


These are the previous versions of the repository in which changes were made to the R Markdown (analysis/revision_acat_selectedtraits.Rmd) and HTML (docs/revision_acat_selectedtraits.html) files. If you’ve configured a remote Git repository (see ?wflow_git_remote), click on the hyperlinks in the table below to view the files as they were in that past version.

File Version Author Date Message
html c151392 XSun 2025-11-14 update
Rmd 306f83a XSun 2025-11-14 update
html 306f83a XSun 2025-11-14 update
Rmd 928d43b XSun 2025-11-14 update
html 928d43b XSun 2025-11-14 update
Rmd e1a196e XSun 2025-11-14 update
html e1a196e XSun 2025-11-14 update
Rmd 3d5fa40 XSun 2025-11-13 update
html 3d5fa40 XSun 2025-11-13 update
Rmd ea8d0f3 XSun 2025-10-24 update
html ea8d0f3 XSun 2025-10-24 update

suppressMessages(library(ggplot2))
suppressMessages(library(gridExtra))
suppressMessages(library(tidyr))
suppressMessages(library(forcats))

source("/project/xinhe/xsun/pathway_factor/data_v2/traits_finalselection_SLEremoved.R")
#traits <- c(traits_EUR, traits_EAS)
traits <- c(traits_EUR)

celltypes <- c("B_cell","CD14_positive_monocyte","CD15_positive_leukocyte","platelet","T_cell","thymocyte")
type <- c("B cell (CD19+)","Monocyte (CD14+)","Leukocyte (CD15+)","Platelet","T cell (CD4+)","T cell (CD8+)")
names(type) <- celltypes
names(celltypes) <- type

qqplot <- function(pvalues,title=NULL) {
  
  pval <- pvalues[complete.cases(pvalues)]
  title <- title
  plotdata <- data.frame(observed = -log10(sort(as.numeric(pval))),
                         expected = -log10(ppoints(length(pval))))
  
  qq <- ggplot(plotdata) + theme_bw(base_line_size =0.3) +
    geom_point(aes(expected, observed), shape = 1, size = 1.5,color = "steelblue") +
    geom_abline(intercept = 0, slope = 1, alpha = 0.6, color = "red") +
    ggtitle(title) + theme(plot.title = element_text(hjust = 0.5)) +
    labs(x = expression(paste("Expected -log"[10],"(p-value)")),
         y = expression(paste("Observed -log"[10],"(p-value)"))) +
    
    theme(axis.title.x = element_text(size = 14),
          axis.text.x = element_text(size = 12, color = "black"),
          axis.title.y = element_text(size = 14),
          axis.text.y = element_text(size = 12, color = "black") )
  
  return(qq)
  
}

histogram <- function(pvalues,title=NULL,xlab =NULL) {
  
  p <- as.data.frame(pvalues)
  colnames(p) <- "p"
  
  plot <-  ggplot(p, aes(x=p)) + geom_histogram(breaks = seq(0, 1, by = 0.05),color="white",fill = "steelblue")+ 
    theme_bw(base_line_size =0.3) +
    ggtitle(title) + theme(plot.title = element_text(hjust = 0.5)) +
    labs(x = xlab,
         y = "Count") +
    
    theme(axis.title.x = element_text(size = 14),
          axis.text.x = element_text(size = 12, color = "black"),
          axis.title.y = element_text(size = 14),
          axis.text.y = element_text(size = 12, color = "black"),
          text= element_text(family="Times")) 
  
  return(plot)
  
}

qqplot_multi <- function(pvalues_list, legend_names = NULL, colors = NULL, title = NULL) {
  
  # Check if legend names are provided, else use default names
  if (is.null(legend_names) || length(legend_names) != length(pvalues_list)) {
    legend_names <- paste("Set", seq_along(pvalues_list))
  }
  
  # Check if colors are provided, else use rainbow colors
  if (is.null(colors) || length(colors) != length(pvalues_list)) {
    colors <- rainbow(length(pvalues_list))
  }
  
  # Initialize an empty list for storing data frames
  plotdata_list <- list()
  
  # Loop through each vector of p-values
  for (i in seq_along(pvalues_list)) {
    pval <- pvalues_list[[i]][complete.cases(pvalues_list[[i]])]
    data <- data.frame(
      observed = -log10(sort(as.numeric(pval))),
      expected = -log10(ppoints(length(pval))),
      set = legend_names[i]  # Use provided legend name
    )
    plotdata_list[[i]] <- data
  }
  
  # Combine all data frames into one
  plotdata <- do.call(rbind, plotdata_list)
  
  # Plotting
  qq <- ggplot(plotdata, aes(x = expected, y = observed, color = set)) + 
    theme_bw(base_line_size = 0.3) +
    geom_point(shape = 1, size = 1.5) +
    geom_abline(intercept = 0, slope = 1, alpha = 0.6, color = "black") +
    ggtitle(title) + 
    theme(plot.title = element_text(hjust = 0.5)) +
    labs(x = expression(paste("Expected -log"[10], "(p-value)")),
         y = expression(paste("Observed -log"[10], "(p-value)")),
         color = "Groups") +
    scale_color_manual(values = colors) +  # Use provided or default colors
    theme(axis.title.x = element_text(size = 14),
          axis.text.x = element_text(size = 12, color = "black"),
          axis.title.y = element_text(size = 14),
          axis.text.y = element_text(size = 12, color = "black"))
  
  return(qq)
}

For all independent SNPs, we generated 100 random samples using vSampler, matching the number of SNPs in LD and MAF. These randomly sampled SNPs were then used to perform factor–SNP association tests, from which we computed ACAT p-values for each factor–SNP pair. The resulting random ACAT p-values served as the null distributions to correct the original ACAT p-values.

We applied this correction at three levels: pair null, trait null, and global null.

  • Under the pair null, the null distribution was derived from the 100 ACAT p-values of each individual factor–SNP pair.
  • Under the trait null, all factors corresponding to a given trait were pooled to form the null distribution.
  • Under the global null, we pooled all ACAT p-values across factors and traits within each cell type.

When computing ACAT p-values, we used two settings:

  • All independent variants associated with the given trait.
  • Excluding variants within the MHC region.
folder_acat <- "/project/xinhe/xsun/pathway_factor/analysis/2.ACAT_v2/results/acat_correct_EUR/"

acat_allcelltypes <- list()
for (i in 1:length(celltypes)) {
  
  files_acat <- list.files(path = folder_acat, pattern = celltypes[i], full.names = T)
  
  acat_all <- c()
  for(file in files_acat) {
    
    acat <- readRDS(file)
    acat$trait <- sub(".*/[^-]+-(.*)_ACAT_corrected\\.RDS$", "\\1", file)
    acat$celltype <- celltypes[i]
    
    acat_all <- rbind(acat_all,acat)
  }
  
  acat_allcelltypes[[celltypes[i]]] <- acat_all
}

Detailed ACAT p-values for each pair

B cell (CD19+)

celltype <- "B_cell"


acat_celltype <- acat_allcelltypes[[celltype]]
acat_celltype <- acat_celltype[order(acat_celltype$acat_p),]

DT::datatable(acat_celltype,caption = htmltools::tags$caption( style = 'caption-side: left; text-align: left; color:black;  font-size:150% ;',paste0('TOP 100 pairs by Original ACAT p-values -- ', type[celltype])),options = list(pageLength = 10) )

Monocyte (CD14+)

celltype <- "CD14_positive_monocyte"


acat_celltype <- acat_allcelltypes[[celltype]]
acat_celltype <- acat_celltype[order(acat_celltype$acat_p),]

DT::datatable(acat_celltype,caption = htmltools::tags$caption( style = 'caption-side: left; text-align: left; color:black;  font-size:150% ;',paste0('TOP 100 pairs by Original ACAT p-values -- ', type[celltype])),options = list(pageLength = 10) )

Leukocyte (CD15+)

celltype <- "CD15_positive_leukocyte"


acat_celltype <- acat_allcelltypes[[celltype]]
acat_celltype <- acat_celltype[order(acat_celltype$acat_p),]

DT::datatable(acat_celltype,caption = htmltools::tags$caption( style = 'caption-side: left; text-align: left; color:black;  font-size:150% ;',paste0('TOP 100 pairs by Original ACAT p-values -- ', type[celltype])),options = list(pageLength = 10) )

Platelet

celltype <- "platelet"


acat_celltype <- acat_allcelltypes[[celltype]]
acat_celltype <- acat_celltype[order(acat_celltype$acat_p),]

DT::datatable(acat_celltype,caption = htmltools::tags$caption( style = 'caption-side: left; text-align: left; color:black;  font-size:150% ;',paste0('TOP 100 pairs by Original ACAT p-values -- ', type[celltype])),options = list(pageLength = 10) )

T cell (CD4+)

celltype <- "T_cell"


acat_celltype <- acat_allcelltypes[[celltype]]
acat_celltype <- acat_celltype[order(acat_celltype$acat_p),]

DT::datatable(acat_celltype,caption = htmltools::tags$caption( style = 'caption-side: left; text-align: left; color:black;  font-size:150% ;',paste0('TOP 100 pairs by Original ACAT p-values -- ', type[celltype])),options = list(pageLength = 10) )

T cell (CD8+)

celltype <- "thymocyte"


acat_celltype <- acat_allcelltypes[[celltype]]
acat_celltype <- acat_celltype[order(acat_celltype$acat_p),]

DT::datatable(acat_celltype,caption = htmltools::tags$caption( style = 'caption-side: left; text-align: left; color:black;  font-size:150% ;',paste0('TOP 100 pairs by Original ACAT p-values -- ', type[celltype])),options = list(pageLength = 10) )

Original ACAT p-values

MHC included

p1 <- list()
p2 <- list()
for (celltype in celltypes) {
  
  acat_celltype <- acat_allcelltypes[[celltype]]
  
  p <- as.numeric(acat_celltype$acat_p)
  p1[[celltype]] <- histogram(pvalues = p,title = type[celltype],xlab = "ACAT p-values from real data")
  p2[[celltype]] <- qqplot(pvalues = p,title = type[celltype])
  
}

grid.arrange(grobs=p1, nrow = 2)

Version Author Date
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13
grid.arrange(grobs=p2, nrow = 2)

Version Author Date
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

MHC removed

p1 <- list()
p2 <- list()
for (celltype in celltypes) {
  
  acat_celltype <- acat_allcelltypes[[celltype]]
  
  p <- as.numeric(acat_celltype$acat_p_MHCremoved)
  p1[[celltype]] <- histogram(pvalues = p,title = type[celltype],xlab = "ACAT p-values from real data, MHC removed")
  p2[[celltype]] <- qqplot(pvalues = p,title = type[celltype])
  
}

grid.arrange(grobs=p1, nrow = 2)

Version Author Date
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13
grid.arrange(grobs=p2, nrow = 2)

Version Author Date
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Corrected ACAT p-values (pair null)

We applied this correction at three levels: pair null, trait null, and global null.

  • Under the pair null, the null distribution was derived from the 100 ACAT p-values of each individual factor–SNP pair.
  • Under the trait null, all factors corresponding to a given trait were pooled to form the null distribution.
  • Under the global null, we pooled all ACAT p-values across factors and traits within each cell type.

When computing ACAT p-values, we used two settings:

  • All independent variants associated with the given
  • Excluding variants within the MHC region.

MHC included

p1 <- list()
p2 <- list()
for (celltype in celltypes) {
  
  acat_celltype <- acat_allcelltypes[[celltype]]
  
  p <- as.numeric(acat_celltype$acat_p_correct_pairnull)
  p1[[celltype]] <- histogram(pvalues = p,title = type[celltype],xlab = "Corrected ACAT p-values - Pair Null")
  p2[[celltype]] <- qqplot(pvalues = p,title = type[celltype])
  
}

grid.arrange(grobs=p1, nrow = 2)

Version Author Date
c151392 XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13
grid.arrange(grobs=p2, nrow = 2)

Version Author Date
c151392 XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

MHC removed

p1 <- list()
p2 <- list()
for (celltype in celltypes) {
  
  acat_celltype <- acat_allcelltypes[[celltype]]
  
  p <- as.numeric(acat_celltype$acat_p_correct_noMHC_pairnull)
  p1[[celltype]] <- histogram(pvalues = p,title = type[celltype],xlab = "Corrected ACAT p-values - Pair Null, MHC removed")
  p2[[celltype]] <- qqplot(pvalues = p,title = type[celltype])
  
}

grid.arrange(grobs=p1, nrow = 2)

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13
grid.arrange(grobs=p2, nrow = 2)

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Corrected ACAT p-values (trait null)

We applied this correction at three levels: pair null, trait null, and global null.

  • Under the pair null, the null distribution was derived from the 100 ACAT p-values of each individual factor–SNP pair.
  • Under the trait null, all factors corresponding to a given trait were pooled to form the null distribution.
  • Under the global null, we pooled all ACAT p-values across factors and traits within each cell type.

When computing ACAT p-values, we used two settings:

  • All independent variants associated with the given
  • Excluding variants within the MHC region.

MHC included

for (celltype in celltypes) {
  
  acat_celltype <- acat_allcelltypes[[celltype]]
  
  # traits <- unique(acat_celltype$trait)
  
  p1 <- list()
  p2 <- list()
  for (trait in traits){
    
    acat_trait <- acat_celltype[acat_celltype$trait == trait,]
    p <- as.numeric(acat_trait$acat_p_correct_traitnull)
    p1[[trait]] <- histogram(pvalues = p,title = trait,xlab = "Corrected ACAT p-values - trait Null")
    p2[[trait]] <- qqplot(pvalues = p,title = trait)
  }
  
  grid.arrange(grobs = p1, ncol = 6, top = type[celltype])
  grid.arrange(grobs = p2, ncol = 6, top = type[celltype])
}

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

MHC removed

for (celltype in celltypes) {
  
  acat_celltype <- acat_allcelltypes[[celltype]]
  
  # traits <- unique(acat_celltype$trait)
  
  p1 <- list()
  p2 <- list()
  for (trait in traits){
    
    acat_trait <- acat_celltype[acat_celltype$trait == trait,]
    p <- as.numeric(acat_trait$acat_p_correct_noMHC_traitnull)
    p1[[trait]] <- histogram(pvalues = p,title = trait,xlab = "Corrected ACAT p-values - trait Null, MHC removed")
    p2[[trait]] <- qqplot(pvalues = p,title = trait)
  }
  
  grid.arrange(grobs = p1, ncol = 6, top = type[celltype])
  grid.arrange(grobs = p2, ncol = 6, top = type[celltype])
}

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13

Version Author Date
c151392 XSun 2025-11-14
928d43b XSun 2025-11-14
e1a196e XSun 2025-11-14
3d5fa40 XSun 2025-11-13